#loading the libraries.
library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2) 
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(readxl)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(treemapify)
library(dplyr)

#loading electronics data set 
electronics <- read_xlsx("C:\\Users\\User\\Downloads\\electronics.xlsx")
colnames(electronics)
## [1] "Week"               "No. of Commercials" "Sales Volume"
View(electronics)
electronics <- clean_names(electronics, "lower_camel")

#scatterPlot with trend line
scatter <- ggplot(data = electronics, mapping = aes(x = noOfCommercials, y = salesVolume))+
  geom_point()+
  geom_smooth(se = F, method = lm )+
  labs(x = "Number of Commercial", y = "Sales Volume")
ggplotly(scatter)
## `geom_smooth()` using formula = 'y ~ x'
#loading kirklandregional data set
kirklandregional <- read_xlsx("C:\\Users\\User\\Downloads\\kirklandregional.xlsx")
#Data cleaning process
kirklandregional <- clean_names(kirklandregional, "lower_camel")
colnames(kirklandregional)
## [1] "month" "north" "south"
unique(kirklandregional$month)
##  [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
kirklandregional$month <- ordered(kirklandregional$month, 
                                  level = c("Jan", "Feb", "Mar", "Apr", "May", 
                                            "Jun", "Jul", "Aug", "Sep", "Oct", 
                                            "Nov", "Dec"))

#ploating line graphs
line1 <- ggplot(data = kirklandregional, mapping = aes(x = month, y =  north,  group = 1))+
  geom_line(color = "blue")+
  labs(x = "Month", y = "North")
ggplotly(line1)
line2 <- ggplot(data = kirklandregional, mapping = aes(x = month, y =  south,  group = 1))+
  geom_line(color = "orange")+
  labs(x = "Month", y = "South")
 ggplotly(line2)
 line3 <- ggplot(data = kirklandregional, mapping = aes(x = month, group = 1))+
  geom_line(aes(y = north) , color = "blue")+
  geom_line(aes(y = south) , color = "orange")+
  labs(x = "Month", y = "Salels ($10000)")
ggplotly(line3)
#loading accountsmanaged data set
accounts_managed <- read_xlsx("C:\\Users\\User\\Downloads\\accounts_managed.xlsx")
View(accounts_managed)
accounts_managed <- clean_names(accounts_managed, "lower_camel")
colnames(accounts_managed)
## [1] "manager"         "accountsManaged"
#bar chart  
bar <- ggplot(data = accounts_managed, mapping = 
                aes(x = accountsManaged, 
                    y = reorder(manager, accountsManaged )))+
  geom_bar(fill = "skyblue", stat = "identity")+
  labs(x = "Accounts Managed", y = "Managers")
ggplotly(bar)
#loading billionaires data set 
billionaires <- read_xlsx("C:\\Users\\User\\Downloads\\billionaires.xlsx")
billionaires <- clean_names(billionaires, "lower_camel")

#bubble chart diagram
Buuble_chart <- ggplot(billionaires, aes(x = billionairesPer10MResidents, y = perCapitaIncome, 
                         size = numberOfBillionaires, color = country))+
  geom_point(alpha = 0.7)+
  scale_size(name = "Number of Billionaires")+
  theme(legend.position = "none")+
  xlab("Billionaires Per 10M Residents")+
  ylab("Per Capita Income")
ggplotly(Buuble_chart)
#cluster column chart using kirklandregional data set 
#First unpivot the table 

Newkirklandregional <- kirklandregional %>%
  pivot_longer(cols = c ("north" : "south"), 
               values_to = "values", names_to = "direction")
View(Newkirklandregional)
Newkirklandregional$direction <- if_else(Newkirklandregional$direction 
                                         == "north", "North", "South") 

#stacked column diagram
stacked <- ggplot(Newkirklandregional, aes(x = month, y = values, fill = direction))+
  geom_bar(stat = "identity")+
  labs(title = "Stacked Column Diagram", x = "Month", y = "Values", fill = "Direction")
ggplotly(stacked)
#clustered column diagram
clustered <- ggplot(Newkirklandregional, aes(x = month, y = values, fill = direction))+
  geom_bar(stat = "identity", position = "dodge")+
  labs(title = "Clustered Column Diagram", x = "Month", y = "Values", fill = "Direction")
ggplotly(clustered)
#loading global100 data set 
global100 <- read_xlsx("C:\\Users\\User\\Downloads\\global100.xlsx")
str(global100)
## tibble [100 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Continent                   : chr [1:100] "Asia" "Asia" "Asia" "Asia" ...
##  $ Country                     : chr [1:100] "China" "China" "China" "China" ...
##  $ Company                     : chr [1:100] "Agricultural Bank of China" "Bank of China" "China Construction Bank" "ICBC" ...
##  $ Market Value (Billions US $): num [1:100] 141 124 174 216 202 ...
global100 <- clean_names(global100, "snake")

#Tree map
ggplot(global100, aes(fill = continent, 
                      area = market_value_billions_us, label = company))+
  geom_treemap( color = "grey")+
   geom_treemap_text(size = 8)+
   labs(fill = "Continent")

#loading worldgdp2014 data set 
worldgdp2014 <- read_xlsx("C:\\Users\\User\\Downloads\\worldgdp2014.xlsx")
str(worldgdp2014)
## tibble [190 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Country Name            : chr [1:190] "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ GDP 2014 (Billions US $): num [1:190] 20.05 13.22 213.52 126.78 1.22 ...
##  $ GDP Growth 2014 (%)     : num [1:190] -1.71 1.91 1.81 1.43 3.55 ...
worldgdp2014 <- clean_names(worldgdp2014, "snake")
n_distinct(worldgdp2014$country_name)
## [1] 190
mapdata <- map_data("world")
mapdata <- dplyr::rename(mapdata, country_name = region)
mapdata1<- left_join(mapdata, worldgdp2014, by = "country_name")

mapdata1 <- mapdata1 %>% 
  filter(!is.na(mapdata1$gdp_2014_billions_us &
                            mapdata1$gdp_growth_2014_percent))

map <- ggplot(mapdata1, aes(x = long, y = lat, group = group, label = country_name))+
  geom_polygon(aes(fill = gdp_2014_billions_us), color = "black")+
  scale_fill_gradient(name = "GDP 2014 Billions ($)", low = "yellow", high = "red")+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(), 
        rect = element_blank())
ggplotly(map)
#loading homesalesstacked data set
homesalesstacked <- read_xlsx("C:\\Users\\User\\Downloads\\homesalesstacked.xlsx")
homesalesstacked <- clean_names(homesalesstacked, "snake")

box <- ggplot(homesalesstacked, aes(x = location, y = selling_price, fill = location))+
  geom_boxplot()+
  scale_y_continuous(labels = scales::comma)+
  theme(legend.position = "none")+
  xlab("Location")+
  ylab("Selling Price")
ggplotly(box)
R Core Team (2023). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.